set.seed(1)x <- (1:100)y <-integer(100)errors <-rnorm(100, mean =0 , sd =0.2)y_function <-function(a, b){# A function that generates all the "y" values ## Args:# a - the "x" input value# b - the "error term" associated with that value## Return:# y value return(sin(a/10)+b)}for (i in1:100) { y[i] <-y_function(x[i], errors[i])}write.csv(x, file ="_raw_data/Prac2_xvalues.csv") #saving our xwrite.csv(y, file ="_raw_data/Prac2_yvalues.csv") #saving our ywrite.csv(errors, file ="_raw_data/Prac2_error_values.csv") #saving our errors
3.2 Question 2:
3.2.1 Instructions:
Implement the LOWESS Algorithm:
Define a function customLowess(x, y, f) that returns the smoothed values.
3.2.2 Answer:
Code
pairwise_distance <-function(x) {# Computes a matrix of absolute differences between x–values.## Args:# x - A numeric vector.## Returns:# A symmetric matrix D where D[i, j] = |x[i] - x[j]|. num_obs <-length(x) D <-matrix(0, num_obs, num_obs) # Initialize an n x n matrixfor (i in1:num_obs) {for (j in1:num_obs) { D[i, j] <-abs(x[i] - x[j]) } }return(D)}find_k_nearest <-function(i, D, k) {# Finds the indices of the k nearest neighbors (excluding the point itself)# for the i-th observation based on the distance matrix D.## Args:# i - Index of the targeted observation.# D - The symmetric matrix difference D .# k - The number of neighbors to select.## Returns:# A vector of indices corresponding to the k nearest neighbors of x[i]. row_distances <- D[i, ] ordered_indices <-order(row_distances) nearest_indices <- ordered_indices[2:(k +1)] # The first index is i itself (distance = 0), so we take the next k indices.return(nearest_indices)}customLowess <-function(x, y, f) {# Performs LOWESS smoothing on (x, y) data using a fixed fraction f of neighbors.## Args:# x - Numeric vector of independent variable values.# y - Numeric vector of dependent variable values.# f - Fraction (span) of points to use for local regression (default is 0.3).## Returns:# A data frame with the original x values and the corresponding smoothed y_hat values. num_obs <-length(x) k <-ceiling(f * num_obs) D <-pairwise_distance(x) y_hat <-numeric(num_obs) # Initialize vector to hold the y_hat (smoothed) y values.for (i in1:num_obs) { k_nearest_neighbors <-find_k_nearest(i, D, k) d_max <-max(D[i, (k_nearest_neighbors)]) weights <-numeric(length(k_nearest_neighbors)) # Initialize vector to hold the weightsfor (j inseq_along(k_nearest_neighbors)) { neighbor_idx <- k_nearest_neighbors[j] weights[j] <- (1- (D[i, neighbor_idx] / d_max)^3)^3 } x_coordinates_in_neighborhood <- x[k_nearest_neighbors] y_coordinates_in_neighborhood <- y[k_nearest_neighbors] X <-cbind(1, x_coordinates_in_neighborhood) W <-diag(weights) beta_hat <-solve(t(X) %*% W %*% X) %*%t(X) %*% W %*% y_coordinates_in_neighborhood y_hat[i] <- beta_hat[1] + beta_hat[2] * x[i] } result <-data.frame(x_obs = x, y_hat_est = y_hat)return(result)}
3.3 Question 3:
3.3.1 Instructions:
Compare with R’s Built-in lowess():
Use the built-in lowess() function with the same f value. You will also need to set the iter argument to 0.
Plot both curves to compare their smoothing values.
3.3.2 Answers:
Code
# Custom LOWESS (your user-defined function; ensure it is in your workspace)result1 <-customLowess(x, y, f =0.3)# Built-in lowess from R (note: iter = 0 means no robust iterations)result2 <-lowess(x, y, f =0.3, iter =0)